Load R libraries
library(Amelia)
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.4, built: 2015-12-05)
## ## Copyright (C) 2005-2018 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library(mice)
## Loading required package: lattice
library(miceadds)
## * miceadds 2.9-15 (2017-12-18 11:50:04)
library(ggplot2)
library(lattice)
Load vehicle data without missing target.
setwd("/Users/davidleonardi/Projects/KE5107_BayesianNetworks/")
data <- read.csv("./vehicle_safety_remove_missing_target.csv")
data$GV_LANES <- as.factor(data$GV_LANES)
data$GV_MODELYR <- as.factor(data$GV_MODELYR)
data$GV_WGTCDTR <- as.factor(data$GV_WGTCDTR)
data$OA_BAGDEPLY <- as.factor(data$OA_BAGDEPLY)
data$OA_MAIS <- as.factor(data$OA_MAIS)
data$OA_MANUSE <- as.factor(data$OA_MANUSE)
data$OA_SEX <- as.factor(data$OA_SEX)
data$VE_GAD1 <- as.factor(data$VE_GAD1)
summary(data)
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY
## Min. : 670 Min. :-114.000 Min. :-145.00 Min. : 4.0
## 1st Qu.:1360 1st Qu.: -6.000 1st Qu.: -23.00 1st Qu.: 155.0
## Median :1530 Median : 0.000 Median : -15.00 Median : 305.0
## Mean :1618 Mean : 0.113 Mean : -14.75 Mean : 501.5
## 3rd Qu.:1830 3rd Qu.: 7.000 3rd Qu.: -8.00 3rd Qu.: 598.0
## Max. :4310 Max. : 118.000 Max. : 84.00 Max. :9852.0
## NA's :39 NA's :5877 NA's :5877 NA's :5877
## GV_LANES GV_MODELYR GV_OTVEHWGT GV_SPLIMIT
## 2 :7347 2000 :2670 Min. : 640 Min. : 0.00
## 4 :3922 2002 :2659 1st Qu.:1340 1st Qu.:35.00
## 3 :3520 2001 :2577 Median :1550 Median :40.00
## 5 :2981 2003 :2348 Mean :1630 Mean :40.72
## 6 : 651 2004 :2048 3rd Qu.:1840 3rd Qu.:45.00
## (Other): 779 2005 :1896 Max. :4540 Max. :75.00
## NA's : 3 (Other):5005 NA's :1992 NA's :221
## GV_WGTCDTR OA_AGE OA_BAGDEPLY
## Passenger Car :11800 Min. : 0.00 Deployed : 9017
## Truck (<=10000 lbs.): 2414 1st Qu.:25.00 Not Deployed:10186
## Truck (<=6000 lbs.) : 4989 Median :37.00
## Mean :40.23
## 3rd Qu.:52.00
## Max. :97.00
## NA's :14
## OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX OA_WEIGHT
## Min. : 59.0 0:7319 0 : 2175 : 201 Min. : 31.0
## 1st Qu.:163.0 1:8814 1 :16643 Female:9453 1st Qu.: 64.0
## Median :170.0 2:1517 NA's: 385 Male :9549 Median : 77.0
## Mean :170.8 3: 950 Mean : 78.8
## 3rd Qu.:178.0 4: 301 3rd Qu.: 91.0
## Max. :216.0 5: 217 Max. :150.0
## NA's :2116 6: 85 NA's :2008
## VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## : 771 Min. :105.0 Min. :141.0 Min. : 5.0
## Front:11176 1st Qu.:149.0 1st Qu.:262.0 1st Qu.:115.0
## Left : 2997 Median :154.0 Median :272.0 Median :135.0
## Rear : 1629 Mean :154.8 Mean :281.1 Mean :152.2
## Right: 2630 3rd Qu.:158.0 3rd Qu.:288.0 3rd Qu.:175.0
## Max. :185.0 Max. :481.0 Max. :355.0
## NA's :219 NA's :8 NA's :1864
## GV_FOOTPRINT
## Min. :2.468
## 1st Qu.:3.925
## Median :4.200
## Mean :4.364
## 3rd Qu.:4.554
## Max. :7.795
## NA's :223
Show missing data in diagram.
missmap(data[-1], col=c('grey', 'steelblue'), y.cex=0.5, x.cex=0.8)
Get hard numbers of missing data.
# Let's also get some hard numbers
sort(sapply(data, function(x) { sum(is.na(x)) }), decreasing=TRUE)
## GV_DVLAT GV_DVLONG GV_ENERGY OA_HEIGHT OA_WEIGHT
## 5877 5877 5877 2116 2008
## GV_OTVEHWGT VE_PDOF_TR OA_MANUSE GV_FOOTPRINT GV_SPLIMIT
## 1992 1864 385 223 221
## VE_ORIGAVTW GV_CURBWGT OA_AGE VE_WHEELBAS GV_LANES
## 219 39 14 8 3
## GV_MODELYR GV_WGTCDTR OA_BAGDEPLY OA_MAIS OA_SEX
## 0 0 0 0 0
## VE_GAD1
## 0
Show missing data pattern.
md.pattern(data)
## GV_MODELYR GV_WGTCDTR OA_BAGDEPLY OA_MAIS OA_SEX VE_GAD1 GV_LANES
## 10930 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 0
## 250 1 1 1 1 1 1 1
## 97 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 138 1 1 1 1 1 1 1
## 51 1 1 1 1 1 1 1
## 64 1 1 1 1 1 1 1
## 385 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1122 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1
## 77 1 1 1 1 1 1 1
## 2436 1 1 1 1 1 1 1
## 25 1 1 1 1 1 1 1
## 20 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1
## 48 1 1 1 1 1 1 1
## 24 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1
## 15 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 0
## 1221 1 1 1 1 1 1 1
## 35 1 1 1 1 1 1 1
## 30 1 1 1 1 1 1 1
## 69 1 1 1 1 1 1 1
## 13 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 969 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 15 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1
## 16 1 1 1 1 1 1 1
## 22 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1
## 249 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 210 1 1 1 1 1 1 1
## 25 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 24 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1
## 41 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 152 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 68 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 80 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 12 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 20 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 12 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 16 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 25 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1
## 0 0 0 0 0 0 3
## VE_WHEELBAS OA_AGE GV_CURBWGT VE_ORIGAVTW GV_SPLIMIT GV_FOOTPRINT
## 10930 1 1 1 1 1 1
## 5 1 1 0 1 1 1
## 2 1 1 1 1 1 1
## 250 1 1 1 1 1 1
## 97 1 1 1 1 0 1
## 1 1 0 1 1 1 1
## 138 1 1 1 1 1 1
## 51 1 1 1 1 1 1
## 64 1 1 1 1 1 1
## 385 1 1 1 1 1 1
## 1 1 1 1 1 0 1
## 1 1 0 1 1 1 1
## 4 1 1 1 1 1 1
## 1 1 1 1 1 0 1
## 2 1 1 1 1 1 1
## 4 1 1 1 1 1 1
## 2 1 1 1 1 0 1
## 1122 1 1 1 1 1 1
## 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1
## 7 1 1 1 1 0 1
## 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1
## 7 1 1 1 1 1 1
## 77 1 1 1 0 1 0
## 2436 1 1 1 1 1 1
## 25 1 1 1 1 1 1
## 20 1 1 1 1 0 1
## 5 1 0 1 1 1 1
## 48 1 1 1 1 1 1
## 24 1 1 1 1 1 1
## 4 1 1 1 0 1 0
## 1 1 1 1 0 0 0
## 3 1 1 1 0 1 0
## 2 1 1 1 0 1 0
## 1 0 1 0 1 1 0
## 8 1 1 1 0 1 0
## 15 1 1 0 1 1 1
## 1 1 1 1 1 1 1
## 1221 1 1 1 1 1 1
## 35 1 1 1 1 0 1
## 30 1 1 1 1 1 1
## 69 1 1 1 1 1 1
## 13 1 1 1 1 1 1
## 2 1 1 1 1 0 1
## 1 1 0 1 1 1 1
## 969 1 1 1 1 1 1
## 4 1 1 1 1 1 1
## 15 1 1 1 0 1 0
## 3 1 1 0 1 1 1
## 2 1 1 0 1 0 1
## 8 1 1 1 1 0 1
## 16 1 1 1 1 1 1
## 22 1 1 1 1 1 1
## 3 1 1 1 1 0 1
## 7 1 1 1 1 1 1
## 1 1 1 0 1 1 1
## 8 1 1 1 1 1 1
## 249 1 1 1 1 1 1
## 1 1 1 1 1 1 1
## 4 1 1 0 1 1 1
## 210 1 1 1 1 1 1
## 25 1 1 1 1 0 1
## 4 1 1 1 1 1 1
## 24 1 1 1 1 1 1
## 6 1 1 1 1 1 1
## 41 1 1 1 0 1 0
## 1 1 0 1 0 1 0
## 2 1 1 1 0 1 0
## 2 0 1 1 1 1 0
## 2 1 1 0 1 1 1
## 152 1 1 1 1 1 1
## 2 1 1 1 1 0 1
## 3 1 0 1 1 1 1
## 68 1 1 1 1 1 1
## 1 1 1 0 1 1 1
## 2 1 1 1 1 0 1
## 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1
## 1 1 1 1 1 0 1
## 2 1 1 1 1 1 1
## 80 1 1 1 1 1 1
## 1 1 1 1 1 1 1
## 12 1 1 1 0 1 0
## 1 1 1 1 0 0 0
## 3 1 1 1 0 1 0
## 1 0 1 0 1 1 0
## 1 0 1 1 0 1 0
## 20 1 1 1 0 1 0
## 1 1 1 1 0 1 0
## 1 1 1 0 1 1 1
## 12 1 1 1 1 1 1
## 4 1 1 1 1 0 1
## 1 1 0 1 1 1 1
## 1 1 1 1 1 0 1
## 1 1 1 0 1 1 1
## 16 1 1 1 1 1 1
## 2 1 1 1 1 0 1
## 1 1 0 1 1 1 1
## 25 1 1 1 1 1 1
## 1 1 1 1 0 0 0
## 3 1 1 1 0 1 0
## 1 0 1 0 0 1 0
## 1 0 1 1 0 1 0
## 8 1 1 1 0 1 0
## 1 1 1 1 0 0 0
## 4 1 1 1 0 1 0
## 3 1 1 1 1 1 1
## 1 1 1 1 1 0 1
## 2 1 1 1 0 1 0
## 1 1 1 0 0 1 0
## 1 1 1 1 0 1 0
## 1 1 1 1 1 0 1
## 1 1 1 1 0 1 0
## 1 0 1 1 0 1 0
## 2 1 1 1 0 1 0
## 8 14 39 219 221 223
## OA_MANUSE VE_PDOF_TR GV_OTVEHWGT OA_WEIGHT OA_HEIGHT GV_DVLAT
## 10930 1 1 1 1 1 1
## 5 1 1 1 1 1 1
## 2 1 1 1 1 1 1
## 250 1 1 0 1 1 1
## 97 1 1 1 1 1 1
## 1 1 1 1 1 1 1
## 138 1 1 1 1 0 1
## 51 0 1 1 1 1 1
## 64 1 1 1 0 1 1
## 385 1 0 1 1 1 1
## 1 1 1 0 1 1 1
## 1 1 1 0 1 1 1
## 4 1 1 0 1 0 1
## 1 1 1 1 1 0 1
## 2 0 1 0 1 1 1
## 4 0 1 1 1 0 1
## 2 1 1 1 0 1 1
## 1122 1 1 1 0 0 1
## 1 0 1 1 0 1 1
## 1 1 0 0 1 1 1
## 7 1 0 1 1 1 1
## 1 1 0 1 1 0 1
## 5 0 0 1 1 1 1
## 7 1 0 1 0 1 1
## 77 1 1 1 1 1 1
## 2436 1 1 1 1 1 0
## 25 1 1 0 0 0 1
## 20 1 1 1 0 0 1
## 5 1 1 1 0 0 1
## 48 0 1 1 0 0 1
## 24 1 0 1 0 0 1
## 4 1 1 0 1 1 1
## 1 1 1 1 1 1 1
## 3 1 1 1 1 0 1
## 2 0 1 1 1 1 1
## 1 1 1 1 1 1 1
## 8 1 0 1 1 1 1
## 15 1 1 1 1 1 0
## 1 1 1 1 1 1 0
## 1221 1 1 0 1 1 0
## 35 1 1 1 1 1 0
## 30 1 1 1 1 0 0
## 69 0 1 1 1 1 0
## 13 1 1 1 0 1 0
## 2 0 1 1 0 0 1
## 1 0 1 1 0 0 1
## 969 1 0 1 1 1 0
## 4 0 0 1 0 0 1
## 15 1 1 1 0 0 1
## 3 1 1 0 1 1 0
## 2 1 1 1 1 1 0
## 8 1 1 0 1 1 0
## 16 1 1 0 1 0 0
## 22 0 1 0 1 1 0
## 3 0 1 1 1 1 0
## 7 0 1 1 1 0 0
## 1 1 1 1 0 1 0
## 8 1 1 0 0 1 0
## 249 1 1 1 0 0 0
## 1 0 1 1 0 1 0
## 4 1 0 1 1 1 0
## 210 1 0 0 1 1 0
## 25 1 0 1 1 1 0
## 4 1 0 1 1 0 0
## 24 0 0 1 1 1 0
## 6 1 0 1 0 1 0
## 41 1 1 1 1 1 0
## 1 1 1 1 0 0 1
## 2 0 1 1 0 0 1
## 2 1 1 1 1 1 0
## 2 1 1 1 0 0 0
## 152 1 1 0 0 0 0
## 2 1 1 1 0 0 0
## 3 1 1 1 0 0 0
## 68 0 1 1 0 0 0
## 1 1 0 0 1 1 0
## 2 1 0 0 1 1 0
## 1 1 0 0 1 0 0
## 3 0 0 0 1 1 0
## 1 0 0 1 1 1 0
## 2 0 0 1 1 0 0
## 80 1 0 1 0 0 0
## 1 0 0 1 0 1 0
## 12 1 1 0 1 1 0
## 1 1 1 1 1 1 0
## 3 0 1 1 1 1 0
## 1 1 1 1 1 1 0
## 1 1 1 1 1 1 0
## 20 1 0 1 1 1 0
## 1 0 0 1 0 0 1
## 1 0 1 1 0 0 0
## 12 0 1 0 0 0 0
## 4 0 1 1 0 0 0
## 1 0 1 1 0 0 0
## 1 0 0 1 1 0 0
## 1 1 0 1 0 0 0
## 16 1 0 0 0 0 0
## 2 1 0 1 0 0 0
## 1 1 0 1 0 0 0
## 25 0 0 1 0 0 0
## 1 1 1 0 1 1 0
## 3 1 1 1 0 0 0
## 1 1 1 1 1 1 0
## 1 0 1 1 1 1 0
## 8 1 0 0 1 1 0
## 1 1 0 1 1 1 0
## 4 0 0 1 1 1 0
## 3 0 0 0 0 0 0
## 1 0 0 1 0 0 0
## 2 1 1 0 0 0 0
## 1 1 0 0 1 1 0
## 1 1 0 1 0 0 0
## 1 0 0 0 0 0 0
## 1 0 1 0 0 0 0
## 1 0 1 1 0 0 0
## 2 0 0 1 0 0 0
## 385 1864 1992 2008 2116 5877
## GV_DVLONG GV_ENERGY
## 10930 1 1 0
## 5 1 1 1
## 2 1 1 1
## 250 1 1 1
## 97 1 1 1
## 1 1 1 1
## 138 1 1 1
## 51 1 1 1
## 64 1 1 1
## 385 1 1 1
## 1 1 1 2
## 1 1 1 2
## 4 1 1 2
## 1 1 1 2
## 2 1 1 2
## 4 1 1 2
## 2 1 1 2
## 1122 1 1 2
## 1 1 1 2
## 1 1 1 2
## 7 1 1 2
## 1 1 1 2
## 5 1 1 2
## 7 1 1 2
## 77 1 1 2
## 2436 0 0 3
## 25 1 1 3
## 20 1 1 3
## 5 1 1 3
## 48 1 1 3
## 24 1 1 3
## 4 1 1 3
## 1 1 1 3
## 3 1 1 3
## 2 1 1 3
## 1 1 1 3
## 8 1 1 3
## 15 0 0 4
## 1 0 0 4
## 1221 0 0 4
## 35 0 0 4
## 30 0 0 4
## 69 0 0 4
## 13 0 0 4
## 2 1 1 4
## 1 1 1 4
## 969 0 0 4
## 4 1 1 4
## 15 1 1 4
## 3 0 0 5
## 2 0 0 5
## 8 0 0 5
## 16 0 0 5
## 22 0 0 5
## 3 0 0 5
## 7 0 0 5
## 1 0 0 5
## 8 0 0 5
## 249 0 0 5
## 1 0 0 5
## 4 0 0 5
## 210 0 0 5
## 25 0 0 5
## 4 0 0 5
## 24 0 0 5
## 6 0 0 5
## 41 0 0 5
## 1 1 1 5
## 2 1 1 5
## 2 0 0 5
## 2 0 0 6
## 152 0 0 6
## 2 0 0 6
## 3 0 0 6
## 68 0 0 6
## 1 0 0 6
## 2 0 0 6
## 1 0 0 6
## 3 0 0 6
## 1 0 0 6
## 2 0 0 6
## 80 0 0 6
## 1 0 0 6
## 12 0 0 6
## 1 0 0 6
## 3 0 0 6
## 1 0 0 6
## 1 0 0 6
## 20 0 0 6
## 1 1 1 6
## 1 0 0 7
## 12 0 0 7
## 4 0 0 7
## 1 0 0 7
## 1 0 0 7
## 1 0 0 7
## 16 0 0 7
## 2 0 0 7
## 1 0 0 7
## 25 0 0 7
## 1 0 0 7
## 3 0 0 7
## 1 0 0 7
## 1 0 0 7
## 8 0 0 7
## 1 0 0 7
## 4 0 0 7
## 3 0 0 8
## 1 0 0 8
## 2 0 0 8
## 1 0 0 8
## 1 0 0 8
## 1 0 0 9
## 1 0 0 9
## 1 0 0 9
## 2 0 0 9
## 5877 5877 26723
Use mice with Decision Tree to impute missing data.
imp.data <- mice(data, m=1, method='cart', printFlag=FALSE)
summary(imp.data)
## Multiply imputed data set
## Call:
## mice(data = data, m = 1, method = "cart", printFlag = FALSE)
## Number of multiple imputations: 1
## Missing cells per column:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 39 5877 5877 5877 3
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## 0 1992 221 0 14
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## 0 2116 0 385 0
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## 2008 0 219 8 1864
## GV_FOOTPRINT
## 223
## Imputation methods:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## "cart" "cart" "cart" "cart" "cart"
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## "cart" "cart" "cart" "cart" "cart"
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## "cart" "cart" "cart" "cart" "cart"
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## "cart" "cart" "cart" "cart" "cart"
## GV_FOOTPRINT
## "cart"
## VisitSequence:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 1 2 3 4 5
## GV_OTVEHWGT GV_SPLIMIT OA_AGE OA_HEIGHT OA_MANUSE
## 7 8 10 12 14
## OA_WEIGHT VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## 16 18 19 20 21
## PredictorMatrix:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES GV_MODELYR
## GV_CURBWGT 0 1 1 1 1 1
## GV_DVLAT 1 0 1 1 1 1
## GV_DVLONG 1 1 0 1 1 1
## GV_ENERGY 1 1 1 0 1 1
## GV_LANES 1 1 1 1 0 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE OA_BAGDEPLY
## GV_CURBWGT 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1
## GV_LANES 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0
## GV_OTVEHWGT 0 1 1 1 1
## GV_SPLIMIT 1 0 1 1 1
## GV_WGTCDTR 0 0 0 0 0
## OA_AGE 1 1 1 0 1
## OA_BAGDEPLY 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1
## OA_MAIS 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1
## OA_SEX 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1
## VE_GAD1 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1
## OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX OA_WEIGHT VE_GAD1
## GV_CURBWGT 1 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1 1
## GV_LANES 1 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 0 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 0 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 0 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## GV_CURBWGT 1 1 1 1
## GV_DVLAT 1 1 1 1
## GV_DVLONG 1 1 1 1
## GV_ENERGY 1 1 1 1
## GV_LANES 1 1 1 1
## GV_MODELYR 0 0 0 0
## GV_OTVEHWGT 1 1 1 1
## GV_SPLIMIT 1 1 1 1
## GV_WGTCDTR 0 0 0 0
## OA_AGE 1 1 1 1
## OA_BAGDEPLY 0 0 0 0
## OA_HEIGHT 1 1 1 1
## OA_MAIS 0 0 0 0
## OA_MANUSE 1 1 1 1
## OA_SEX 0 0 0 0
## OA_WEIGHT 1 1 1 1
## VE_GAD1 0 0 0 0
## VE_ORIGAVTW 0 1 1 1
## VE_WHEELBAS 1 0 1 1
## VE_PDOF_TR 1 1 0 1
## GV_FOOTPRINT 1 1 1 0
## Random generator seed value: NA
Write imputted mean values from decision tree model.
# write results
write.mice.imputation(mi.res=imp.data, name="mice_imp_cart")
## 2018-03-15 22:01:30
##
## /Users/davidleonardi/Projects/KE5107_BayesianNetworks/mice_imp_cart
##
## Multiply imputed data set
## Call:
## mice(data = data, m = 1, method = "cart", printFlag = FALSE)
## Number of multiple imputations: 1
## Missing cells per column:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 39 5877 5877 5877 3
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## 0 1992 221 0 14
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## 0 2116 0 385 0
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## 2008 0 219 8 1864
## GV_FOOTPRINT
## 223
## Imputation methods:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## "cart" "cart" "cart" "cart" "cart"
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## "cart" "cart" "cart" "cart" "cart"
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## "cart" "cart" "cart" "cart" "cart"
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## "cart" "cart" "cart" "cart" "cart"
## GV_FOOTPRINT
## "cart"
## VisitSequence:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 1 2 3 4 5
## GV_OTVEHWGT GV_SPLIMIT OA_AGE OA_HEIGHT OA_MANUSE
## 7 8 10 12 14
## OA_WEIGHT VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## 16 18 19 20 21
## PredictorMatrix:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES GV_MODELYR
## GV_CURBWGT 0 1 1 1 1 1
## GV_DVLAT 1 0 1 1 1 1
## GV_DVLONG 1 1 0 1 1 1
## GV_ENERGY 1 1 1 0 1 1
## GV_LANES 1 1 1 1 0 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE OA_BAGDEPLY
## GV_CURBWGT 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1
## GV_LANES 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0
## GV_OTVEHWGT 0 1 1 1 1
## GV_SPLIMIT 1 0 1 1 1
## GV_WGTCDTR 0 0 0 0 0
## OA_AGE 1 1 1 0 1
## OA_BAGDEPLY 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1
## OA_MAIS 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1
## OA_SEX 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1
## VE_GAD1 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1
## OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX OA_WEIGHT VE_GAD1
## GV_CURBWGT 1 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1 1
## GV_LANES 1 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 0 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 0 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 0 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## GV_CURBWGT 1 1 1 1
## GV_DVLAT 1 1 1 1
## GV_DVLONG 1 1 1 1
## GV_ENERGY 1 1 1 1
## GV_LANES 1 1 1 1
## GV_MODELYR 0 0 0 0
## GV_OTVEHWGT 1 1 1 1
## GV_SPLIMIT 1 1 1 1
## GV_WGTCDTR 0 0 0 0
## OA_AGE 1 1 1 1
## OA_BAGDEPLY 0 0 0 0
## OA_HEIGHT 1 1 1 1
## OA_MAIS 0 0 0 0
## OA_MANUSE 1 1 1 1
## OA_SEX 0 0 0 0
## OA_WEIGHT 1 1 1 1
## VE_GAD1 0 0 0 0
## VE_ORIGAVTW 0 1 1 1
## VE_WHEELBAS 1 0 1 1
## VE_PDOF_TR 1 1 0 1
## GV_FOOTPRINT 1 1 1 0
## Random generator seed value: NA
## NULL
##
##
##
## To cite R in publications use:
##
## R Core Team (2017). R: A language and environment for
## statistical computing. R Foundation for Statistical Computing,
## Vienna, Austria. URL https://www.R-project.org/.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {R: A Language and Environment for Statistical Computing},
## author = {{R Core Team}},
## organization = {R Foundation for Statistical Computing},
## address = {Vienna, Austria},
## year = {2017},
## url = {https://www.R-project.org/},
## }
##
## We have invested a lot of time and effort in creating R, please
## cite it when using it for data analysis. See also
## 'citation("pkgname")' for citing R packages.
##
##
## To cite mice in publications use:
##
## Stef van Buuren, Karin Groothuis-Oudshoorn (2011). mice:
## Multivariate Imputation by Chained Equations in R. Journal of
## Statistical Software, 45(3), 1-67. URL
## http://www.jstatsoft.org/v45/i03/.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {{mice}: Multivariate Imputation by Chained Equations in R},
## author = {Stef {van Buuren} and Karin Groothuis-Oudshoorn},
## journal = {Journal of Statistical Software},
## year = {2011},
## volume = {45},
## number = {3},
## pages = {1--67},
## url = {http://www.jstatsoft.org/v45/i03/},
## }
##
## sysname
## "Darwin"
## release
## "16.7.0"
## version
## "Darwin Kernel Version 16.7.0: Thu Jan 11 22:59:40 PST 2018; root:xnu-3789.73.8~1/RELEASE_X86_64"
## nodename
## "Chatarinas-MacBook-Pro.local"
## machine
## "x86_64"
## login
## "davidleonardi"
## user
## "davidleonardi"
## effective_user
## "davidleonardi"
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Sierra 10.12.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
##
## locale:
## [1] C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_2.2.1 miceadds_2.9-15 mice_2.46.0 lattice_0.20-35
## [5] Amelia_1.7.4 Rcpp_0.12.15
##
## loaded via a namespace (and not attached):
## [1] lavaan_0.5-23.1097 mitools_2.3 splines_3.4.3
## [4] colorspace_1.3-2 htmltools_0.3.6 stats4_3.4.3
## [7] yaml_2.1.17 mgcv_1.8-22 rlang_0.2.0
## [10] survival_2.41-3 pillar_1.2.1 nloptr_1.0.4
## [13] foreign_0.8-69 lavaan.survey_1.1.3.1 plyr_1.8.4
## [16] mirt_1.26.3 GPArotation_2014.11-1 stringr_1.3.0
## [19] munsell_0.4.3 CDM_6.1-10 gtable_0.2.0
## [22] mvtnorm_1.0-7 coda_0.19-1 evaluate_0.10.1
## [25] knitr_1.20 permute_0.9-4 sirt_2.5-45
## [28] parallel_3.4.3 backports_1.1.2 scales_0.5.0
## [31] vegan_2.4-6 lme4_1.1-15 polycor_0.7-9
## [34] mnormt_1.5-5 digest_0.6.15 stringi_1.1.6
## [37] survey_3.33-2 grid_3.4.3 rprojroot_1.3-2
## [40] quadprog_1.5-5 tools_3.4.3 magrittr_1.5
## [43] lazyeval_0.2.1 tibble_1.4.2 cluster_2.0.6
## [46] pbivnorm_0.6.0 TAM_2.9-35 MASS_7.3-47
## [49] Matrix_1.2-12 minqa_1.2.4 rmarkdown_1.9
## [52] rpart_4.1-11 sfsmisc_1.1-2 nnet_7.3-12
## [55] nlme_3.1-131 compiler_3.4.3
##
## 1
## Data values written to /Users/davidleonardi/Projects/KE5107_BayesianNetworks/mice_imp_cart/mice_imp_cart__SPSS.txt
## Syntax file written to /Users/davidleonardi/Projects/KE5107_BayesianNetworks/mice_imp_cart/mice_imp_cart__SPSS.sps
Compare it with Mean value.
imp.data_raw_mean <- mice(data, m=1, defaultMethod=c('mean', 'cart', 'cart', 'cart'), printFlag=FALSE)
summary(imp.data_raw_mean)
## Multiply imputed data set
## Call:
## mice(data = data, m = 1, defaultMethod = c("mean", "cart", "cart",
## "cart"), printFlag = FALSE)
## Number of multiple imputations: 1
## Missing cells per column:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 39 5877 5877 5877 3
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## 0 1992 221 0 14
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## 0 2116 0 385 0
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## 2008 0 219 8 1864
## GV_FOOTPRINT
## 223
## Imputation methods:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## "mean" "mean" "mean" "mean" "cart"
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## "" "mean" "mean" "" "mean"
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## "" "mean" "" "cart" ""
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## "mean" "" "mean" "mean" "mean"
## GV_FOOTPRINT
## "mean"
## VisitSequence:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 1 2 3 4 5
## GV_OTVEHWGT GV_SPLIMIT OA_AGE OA_HEIGHT OA_MANUSE
## 7 8 10 12 14
## OA_WEIGHT VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## 16 18 19 20 21
## PredictorMatrix:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES GV_MODELYR
## GV_CURBWGT 0 1 1 1 1 1
## GV_DVLAT 1 0 1 1 1 1
## GV_DVLONG 1 1 0 1 1 1
## GV_ENERGY 1 1 1 0 1 1
## GV_LANES 1 1 1 1 0 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE OA_BAGDEPLY
## GV_CURBWGT 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1
## GV_LANES 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0
## GV_OTVEHWGT 0 1 1 1 1
## GV_SPLIMIT 1 0 1 1 1
## GV_WGTCDTR 0 0 0 0 0
## OA_AGE 1 1 1 0 1
## OA_BAGDEPLY 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1
## OA_MAIS 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1
## OA_SEX 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1
## VE_GAD1 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1
## OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX OA_WEIGHT VE_GAD1
## GV_CURBWGT 1 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1 1
## GV_LANES 1 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 0 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 0 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 0 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## GV_CURBWGT 1 1 1 1
## GV_DVLAT 1 1 1 1
## GV_DVLONG 1 1 1 1
## GV_ENERGY 1 1 1 1
## GV_LANES 1 1 1 1
## GV_MODELYR 0 0 0 0
## GV_OTVEHWGT 1 1 1 1
## GV_SPLIMIT 1 1 1 1
## GV_WGTCDTR 0 0 0 0
## OA_AGE 1 1 1 1
## OA_BAGDEPLY 0 0 0 0
## OA_HEIGHT 1 1 1 1
## OA_MAIS 0 0 0 0
## OA_MANUSE 1 1 1 1
## OA_SEX 0 0 0 0
## OA_WEIGHT 1 1 1 1
## VE_GAD1 0 0 0 0
## VE_ORIGAVTW 0 1 1 1
## VE_WHEELBAS 1 0 1 1
## VE_PDOF_TR 1 1 0 1
## GV_FOOTPRINT 1 1 1 0
## Random generator seed value: NA
Write imputted mean values from mean model.
# write results
write.mice.imputation(mi.res=imp.data_raw_mean, name="mice_imp_mean" )
## 2018-03-15 22:02:44
##
## /Users/davidleonardi/Projects/KE5107_BayesianNetworks/mice_imp_mean
##
## Multiply imputed data set
## Call:
## mice(data = data, m = 1, defaultMethod = c("mean", "cart", "cart",
## "cart"), printFlag = FALSE)
## Number of multiple imputations: 1
## Missing cells per column:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 39 5877 5877 5877 3
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## 0 1992 221 0 14
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## 0 2116 0 385 0
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## 2008 0 219 8 1864
## GV_FOOTPRINT
## 223
## Imputation methods:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## "mean" "mean" "mean" "mean" "cart"
## GV_MODELYR GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE
## "" "mean" "mean" "" "mean"
## OA_BAGDEPLY OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX
## "" "mean" "" "cart" ""
## OA_WEIGHT VE_GAD1 VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR
## "mean" "" "mean" "mean" "mean"
## GV_FOOTPRINT
## "mean"
## VisitSequence:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES
## 1 2 3 4 5
## GV_OTVEHWGT GV_SPLIMIT OA_AGE OA_HEIGHT OA_MANUSE
## 7 8 10 12 14
## OA_WEIGHT VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## 16 18 19 20 21
## PredictorMatrix:
## GV_CURBWGT GV_DVLAT GV_DVLONG GV_ENERGY GV_LANES GV_MODELYR
## GV_CURBWGT 0 1 1 1 1 1
## GV_DVLAT 1 0 1 1 1 1
## GV_DVLONG 1 1 0 1 1 1
## GV_ENERGY 1 1 1 0 1 1
## GV_LANES 1 1 1 1 0 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## GV_OTVEHWGT GV_SPLIMIT GV_WGTCDTR OA_AGE OA_BAGDEPLY
## GV_CURBWGT 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1
## GV_LANES 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0
## GV_OTVEHWGT 0 1 1 1 1
## GV_SPLIMIT 1 0 1 1 1
## GV_WGTCDTR 0 0 0 0 0
## OA_AGE 1 1 1 0 1
## OA_BAGDEPLY 0 0 0 0 0
## OA_HEIGHT 1 1 1 1 1
## OA_MAIS 0 0 0 0 0
## OA_MANUSE 1 1 1 1 1
## OA_SEX 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 1
## VE_GAD1 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1
## OA_HEIGHT OA_MAIS OA_MANUSE OA_SEX OA_WEIGHT VE_GAD1
## GV_CURBWGT 1 1 1 1 1 1
## GV_DVLAT 1 1 1 1 1 1
## GV_DVLONG 1 1 1 1 1 1
## GV_ENERGY 1 1 1 1 1 1
## GV_LANES 1 1 1 1 1 1
## GV_MODELYR 0 0 0 0 0 0
## GV_OTVEHWGT 1 1 1 1 1 1
## GV_SPLIMIT 1 1 1 1 1 1
## GV_WGTCDTR 0 0 0 0 0 0
## OA_AGE 1 1 1 1 1 1
## OA_BAGDEPLY 0 0 0 0 0 0
## OA_HEIGHT 0 1 1 1 1 1
## OA_MAIS 0 0 0 0 0 0
## OA_MANUSE 1 1 0 1 1 1
## OA_SEX 0 0 0 0 0 0
## OA_WEIGHT 1 1 1 1 0 1
## VE_GAD1 0 0 0 0 0 0
## VE_ORIGAVTW 1 1 1 1 1 1
## VE_WHEELBAS 1 1 1 1 1 1
## VE_PDOF_TR 1 1 1 1 1 1
## GV_FOOTPRINT 1 1 1 1 1 1
## VE_ORIGAVTW VE_WHEELBAS VE_PDOF_TR GV_FOOTPRINT
## GV_CURBWGT 1 1 1 1
## GV_DVLAT 1 1 1 1
## GV_DVLONG 1 1 1 1
## GV_ENERGY 1 1 1 1
## GV_LANES 1 1 1 1
## GV_MODELYR 0 0 0 0
## GV_OTVEHWGT 1 1 1 1
## GV_SPLIMIT 1 1 1 1
## GV_WGTCDTR 0 0 0 0
## OA_AGE 1 1 1 1
## OA_BAGDEPLY 0 0 0 0
## OA_HEIGHT 1 1 1 1
## OA_MAIS 0 0 0 0
## OA_MANUSE 1 1 1 1
## OA_SEX 0 0 0 0
## OA_WEIGHT 1 1 1 1
## VE_GAD1 0 0 0 0
## VE_ORIGAVTW 0 1 1 1
## VE_WHEELBAS 1 0 1 1
## VE_PDOF_TR 1 1 0 1
## GV_FOOTPRINT 1 1 1 0
## Random generator seed value: NA
## NULL
##
##
##
## To cite R in publications use:
##
## R Core Team (2017). R: A language and environment for
## statistical computing. R Foundation for Statistical Computing,
## Vienna, Austria. URL https://www.R-project.org/.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {R: A Language and Environment for Statistical Computing},
## author = {{R Core Team}},
## organization = {R Foundation for Statistical Computing},
## address = {Vienna, Austria},
## year = {2017},
## url = {https://www.R-project.org/},
## }
##
## We have invested a lot of time and effort in creating R, please
## cite it when using it for data analysis. See also
## 'citation("pkgname")' for citing R packages.
##
##
## To cite mice in publications use:
##
## Stef van Buuren, Karin Groothuis-Oudshoorn (2011). mice:
## Multivariate Imputation by Chained Equations in R. Journal of
## Statistical Software, 45(3), 1-67. URL
## http://www.jstatsoft.org/v45/i03/.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {{mice}: Multivariate Imputation by Chained Equations in R},
## author = {Stef {van Buuren} and Karin Groothuis-Oudshoorn},
## journal = {Journal of Statistical Software},
## year = {2011},
## volume = {45},
## number = {3},
## pages = {1--67},
## url = {http://www.jstatsoft.org/v45/i03/},
## }
##
## sysname
## "Darwin"
## release
## "16.7.0"
## version
## "Darwin Kernel Version 16.7.0: Thu Jan 11 22:59:40 PST 2018; root:xnu-3789.73.8~1/RELEASE_X86_64"
## nodename
## "Chatarinas-MacBook-Pro.local"
## machine
## "x86_64"
## login
## "davidleonardi"
## user
## "davidleonardi"
## effective_user
## "davidleonardi"
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Sierra 10.12.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
##
## locale:
## [1] C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_2.2.1 miceadds_2.9-15 mice_2.46.0 lattice_0.20-35
## [5] Amelia_1.7.4 Rcpp_0.12.15
##
## loaded via a namespace (and not attached):
## [1] lavaan_0.5-23.1097 mitools_2.3 splines_3.4.3
## [4] colorspace_1.3-2 htmltools_0.3.6 stats4_3.4.3
## [7] yaml_2.1.17 mgcv_1.8-22 rlang_0.2.0
## [10] survival_2.41-3 pillar_1.2.1 nloptr_1.0.4
## [13] foreign_0.8-69 lavaan.survey_1.1.3.1 plyr_1.8.4
## [16] mirt_1.26.3 GPArotation_2014.11-1 stringr_1.3.0
## [19] munsell_0.4.3 CDM_6.1-10 gtable_0.2.0
## [22] mvtnorm_1.0-7 coda_0.19-1 evaluate_0.10.1
## [25] knitr_1.20 permute_0.9-4 sirt_2.5-45
## [28] parallel_3.4.3 backports_1.1.2 scales_0.5.0
## [31] vegan_2.4-6 lme4_1.1-15 polycor_0.7-9
## [34] mnormt_1.5-5 digest_0.6.15 stringi_1.1.6
## [37] survey_3.33-2 grid_3.4.3 rprojroot_1.3-2
## [40] quadprog_1.5-5 tools_3.4.3 magrittr_1.5
## [43] lazyeval_0.2.1 tibble_1.4.2 cluster_2.0.6
## [46] pbivnorm_0.6.0 TAM_2.9-35 MASS_7.3-47
## [49] Matrix_1.2-12 minqa_1.2.4 rmarkdown_1.9
## [52] rpart_4.1-11 sfsmisc_1.1-2 nnet_7.3-12
## [55] nlme_3.1-131 compiler_3.4.3
##
## 1
## Data values written to /Users/davidleonardi/Projects/KE5107_BayesianNetworks/mice_imp_mean/mice_imp_mean__SPSS.txt
## Syntax file written to /Users/davidleonardi/Projects/KE5107_BayesianNetworks/mice_imp_mean/mice_imp_mean__SPSS.sps
Plot GV_DVLAT and VE_PDOF_TR for imputed data using Decision Tree.
xyplot(imp.data, GV_DVLAT ~ VE_PDOF_TR)
Plot GV_DVLONG and VE_PDOF_TR for imputed data using Decision Tree.
xyplot(imp.data, GV_DVLONG ~ VE_PDOF_TR)
Plot GV_DVLAT and GV_FOOTPRINT for imputed data using Decision Tree.
xyplot(imp.data, GV_DVLAT ~ GV_FOOTPRINT)
Plot GV_DVLAT and GV_FOOTPRINT for imputed data using Decision Tree.
xyplot(imp.data, GV_DVLONG ~ GV_FOOTPRINT)
Plot GV_DVLAT and VE_PDOF_TR for imputed data using Mean value.
xyplot(imp.data_raw_mean, GV_DVLAT ~ VE_PDOF_TR)
Plot GV_DVLONG and VE_PDOF_TR for imputed data using Mean value.
xyplot(imp.data_raw_mean, GV_DVLONG ~ VE_PDOF_TR)
Plot GV_DVLAT and GV_FOOTPRINT for imputed data using Mean value.
xyplot(imp.data_raw_mean, GV_DVLAT ~ GV_FOOTPRINT)
Plot GV_DVLONG and GV_FOOTPRINT for imputed data using Mean value.
xyplot(imp.data_raw_mean, GV_DVLONG ~ GV_FOOTPRINT)
Save the imputed data.
data_complete <- complete(imp.data)
write.csv(data_complete, "./vehicle_safety_imputed_data.csv")